mawiesne commented on code in PR #1072:
URL: https://github.com/apache/opennlp/pull/1072#discussion_r3403897272
##########
opennlp-core/opennlp-ml/opennlp-dl/src/main/java/opennlp/dl/vectors/SentenceVectorsDL.java:
##########
@@ -72,38 +84,61 @@ public float[] getVectors(final String sentence) throws
OrtException {
final Map<String, OnnxTensor> inputs = new HashMap<>();
- inputs.put(INPUT_IDS, OnnxTensor.createTensor(env,
LongBuffer.wrap(tokens.ids()),
- new long[] {1, tokens.ids().length}));
-
- inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
- LongBuffer.wrap(tokens.mask()), new long[] {1, tokens.mask().length}));
+ try {
+ inputs.put(INPUT_IDS, OnnxTensor.createTensor(env,
LongBuffer.wrap(tokens.ids()),
+ new long[] {1, tokens.ids().length}));
- inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
- LongBuffer.wrap(tokens.types()), new long[] {1,
tokens.types().length}));
+ inputs.put(ATTENTION_MASK, OnnxTensor.createTensor(env,
+ LongBuffer.wrap(tokens.mask()), new long[] {1,
tokens.mask().length}));
- final float[][][] v = (float[][][]) session.run(inputs).get(0).getValue();
+ inputs.put(TOKEN_TYPE_IDS, OnnxTensor.createTensor(env,
+ LongBuffer.wrap(tokens.types()), new long[] {1,
tokens.types().length}));
- return v[0][0];
+ try (OrtSession.Result result = session.run(inputs)) {
+ // getValue() copies the tensor into Java arrays, so the result can be
closed safely.
+ final float[][][] v = (float[][][]) result.get(0).getValue();
+ return v[0][0];
+ }
+ } finally {
+ inputs.values().forEach(OnnxTensor::close);
+ }
}
- private Tokens tokenize(final String text, Tokenizer tokenizer, Map<String,
Integer> vocab) {
+ /**
+ * Encodes text as model inputs: wordpiece token ids, an attention mask of
ones,
+ * and single-segment (all zero) token type ids.
+ *
+ * @param text The text to encode.
+ * @param tokenizer The wordpiece tokenizer matching the {@code vocab}.
+ * @param vocab The vocabulary map.
+ * @return The encoded {@link Tokens}.
+ *
+ * @throws IllegalArgumentException Thrown if the tokenizer emits a token
that is
+ * not present in the vocabulary.
+ */
+ static Tokens tokenize(final String text, final Tokenizer tokenizer,
Review Comment:
Thx for giving a rational / explanation.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]