This is an automated email from the ASF dual-hosted git repository.

joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


The following commit(s) were added to refs/heads/master by this push:
     new 37af4c6  Added code to randomly drop a character while training
37af4c6 is described below

commit 37af4c6d42a9affba4f7c9bbc64175768750563f
Author: Suneel Marthi <[email protected]>
AuthorDate: Sun Mar 3 20:46:00 2019 +0100

    Added code to randomly drop a character while training
---
 tf-ner-poc/src/main/python/namecat/namecat.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tf-ner-poc/src/main/python/namecat/namecat.py 
b/tf-ner-poc/src/main/python/namecat/namecat.py
index 5ff0dfb..86822c0 100644
--- a/tf-ner-poc/src/main/python/namecat/namecat.py
+++ b/tf-ner-poc/src/main/python/namecat/namecat.py
@@ -150,6 +150,7 @@ def main():
         char_set = char_set.union(name)
 
     char_dict = {k: v for v, k in enumerate(char_set)}
+    char_dict[chr(0)] = 0
 
     dropout_keep_prob, char_ids_ph, name_lengths_ph, y_ph = 
create_placeholders()
 
@@ -163,7 +164,7 @@ def main():
         sess.run(init)
 
         batch_size = 20
-        for epoch in range(10):
+        for epoch in range(20):
             print("Epoch " + str(epoch))
             acc_train = []
 
@@ -174,6 +175,11 @@ def main():
                 label_train_batch, name_train_batch, name_train_length = \
                     mini_batch(label_dict, char_dict, labels_train, 
names_train, batch_size, batch_index)
 
+                # Add char dropout here ...
+                for i, j in np.ndindex(name_train_batch.shape):
+                    if random.uniform(0, 1) <= 0.0005:
+                        name_train_batch[i][j] = 0
+
                 feed_dict = {dropout_keep_prob: 0.5, char_ids_ph: 
name_train_batch, name_lengths_ph: name_train_length, y_ph: label_train_batch}
                 _, probs = sess.run([train_op, probs_op], feed_dict)
 

Reply via email to