This is an automated email from the ASF dual-hosted git repository.
joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 680d91c Use STX and ETX for start end seq chars in normalizer
680d91c is described below
commit 680d91cbde4a279117320cf90a2613ccfa9fe2fa
Author: Jörn Kottmann <[email protected]>
AuthorDate: Fri Feb 1 11:06:34 2019 +0100
Use STX and ETX for start end seq chars in normalizer
---
.../src/main/java/org/apache/opennlp/normalizer/Normalizer.java | 2 +-
tf-ner-poc/src/main/python/normalizer/normalizer.py | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git
a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
index 5629a06..f0261fe 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
@@ -39,7 +39,7 @@ import org.tensorflow.Tensor;
public class Normalizer {
- private static final char END_MARKER = 'E';
+ private static final char END_MARKER = 3;
private final Session session;
private final Map<Character, Integer> sourceCharMap;
diff --git a/tf-ner-poc/src/main/python/normalizer/normalizer.py
b/tf-ner-poc/src/main/python/normalizer/normalizer.py
index a0eabe8..04be1bb 100644
--- a/tf-ner-poc/src/main/python/normalizer/normalizer.py
+++ b/tf-ner-poc/src/main/python/normalizer/normalizer.py
@@ -211,9 +211,9 @@ def main():
target_char_dict = encode_chars(target_train + target_dev + target_test)
- # TODO: Find better chars for begin and end markers
- target_char_dict['S'] = len(target_char_dict)
- target_char_dict['E'] = len(target_char_dict)
+ # char id 2 is STX (Start of Text), and 3 ETX (End of Text)
+ target_char_dict[chr(2)] = len(target_char_dict)
+ target_char_dict[chr(3)] = len(target_char_dict)
target_dict_rev = {v: k for k, v in target_char_dict.items()}