This is an automated email from the ASF dual-hosted git repository.
joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 5914581 Remove new lines chars training data in namecat
5914581 is described below
commit 5914581e40574817fca0557ddedff887aa8dfc96
Author: Jörn Kottmann <[email protected]>
AuthorDate: Mon Feb 18 11:26:33 2019 +0100
Remove new lines chars training data in namecat
---
tf-ner-poc/src/main/python/namecat/namecat.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tf-ner-poc/src/main/python/namecat/namecat.py
b/tf-ner-poc/src/main/python/namecat/namecat.py
index 7e3abf3..5ff0dfb 100644
--- a/tf-ner-poc/src/main/python/namecat/namecat.py
+++ b/tf-ner-poc/src/main/python/namecat/namecat.py
@@ -28,13 +28,13 @@ import os
from tempfile import TemporaryDirectory
def load_data(file):
- with open(file) as f:
+ with open(file, encoding="utf-8") as f:
labels = []
names = []
for line in f:
parts = re.split(r'\t+', line)
- labels.append(parts[0]);
- names.append(parts[1])
+ labels.append(parts[0].strip())
+ names.append(parts[1].strip())
return labels, names
# create placeholders