This is an automated email from the ASF dual-hosted git repository.
joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 37af4c6 Added code to randomly drop a character while training
37af4c6 is described below
commit 37af4c6d42a9affba4f7c9bbc64175768750563f
Author: Suneel Marthi <[email protected]>
AuthorDate: Sun Mar 3 20:46:00 2019 +0100
Added code to randomly drop a character while training
---
tf-ner-poc/src/main/python/namecat/namecat.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/tf-ner-poc/src/main/python/namecat/namecat.py
b/tf-ner-poc/src/main/python/namecat/namecat.py
index 5ff0dfb..86822c0 100644
--- a/tf-ner-poc/src/main/python/namecat/namecat.py
+++ b/tf-ner-poc/src/main/python/namecat/namecat.py
@@ -150,6 +150,7 @@ def main():
char_set = char_set.union(name)
char_dict = {k: v for v, k in enumerate(char_set)}
+ char_dict[chr(0)] = 0
dropout_keep_prob, char_ids_ph, name_lengths_ph, y_ph =
create_placeholders()
@@ -163,7 +164,7 @@ def main():
sess.run(init)
batch_size = 20
- for epoch in range(10):
+ for epoch in range(20):
print("Epoch " + str(epoch))
acc_train = []
@@ -174,6 +175,11 @@ def main():
label_train_batch, name_train_batch, name_train_length = \
mini_batch(label_dict, char_dict, labels_train,
names_train, batch_size, batch_index)
+ # Add char dropout here ...
+ for i, j in np.ndindex(name_train_batch.shape):
+ if random.uniform(0, 1) <= 0.0005:
+ name_train_batch[i][j] = 0
+
feed_dict = {dropout_keep_prob: 0.5, char_ids_ph:
name_train_batch, name_lengths_ph: name_train_length, y_ph: label_train_batch}
_, probs = sess.run([train_op, probs_op], feed_dict)