This is an automated email from the ASF dual-hosted git repository. skm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new e456dc4 Fix speech recognition example (#12291) e456dc4 is described below commit e456dc45ce781bfb08a71d0d2e2b87fcb98250c7 Author: Vandana Kannan <vandan...@users.noreply.github.com> AuthorDate: Wed Aug 29 21:11:18 2018 -0700 Fix speech recognition example (#12291) --- example/speech_recognition/README.md | 4 ++-- example/speech_recognition/deepspeech.cfg | 5 +++-- example/speech_recognition/default.cfg | 2 +- example/speech_recognition/singleton.py | 26 ++++++-------------------- example/speech_recognition/stt_metric.py | 9 +++++++-- example/speech_recognition/train.py | 21 ++++++++++----------- 6 files changed, 29 insertions(+), 38 deletions(-) diff --git a/example/speech_recognition/README.md b/example/speech_recognition/README.md index 00d1666..f95fddf 100644 --- a/example/speech_recognition/README.md +++ b/example/speech_recognition/README.md @@ -19,9 +19,9 @@ With rich functionalities and convenience explained above, you can build your ow ## **Environments** - MXNet version: 0.9.5+ - GPU memory size: 2.4GB+ -- Install tensorboard for logging +- Install mxboard for logging <pre> -<code>pip install tensorboard</code> +<code>pip install mxboard</code> </pre> - [SoundFile](https://pypi.python.org/pypi/SoundFile/0.8.1) for audio preprocessing (If encounter errors about libsndfile, follow [this tutorial](http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libsndfile.html).) diff --git a/example/speech_recognition/deepspeech.cfg b/example/speech_recognition/deepspeech.cfg index ec3af04..69894ae 100644 --- a/example/speech_recognition/deepspeech.cfg +++ b/example/speech_recognition/deepspeech.cfg @@ -26,14 +26,15 @@ prefix = deep_bucket # when mode is load or predict, model will be loaded from the file name with model_file under checkpoints model_file = deep_bucketn_epoch0n_batch-0018 batch_size = 12 -#batch_size=4 +#use batch_size 4 with single GPU +#batch_size = 4 # log will be saved by the log_filename log_filename = deep_bucket.log # checkpoint set n to save checkpoints after n epoch save_checkpoint_every_n_epoch = 1 save_checkpoint_every_n_batch = 3000 is_bi_graphemes = True -tensorboard_log_dir = tblog/deep_bucket +mxboard_log_dir = mxlog/deep_bucket # if random_seed is -1 then it gets random seed from timestamp mx_random_seed = -1 random_seed = -1 diff --git a/example/speech_recognition/default.cfg b/example/speech_recognition/default.cfg index e4beb83..b0869a9 100644 --- a/example/speech_recognition/default.cfg +++ b/example/speech_recognition/default.cfg @@ -31,7 +31,7 @@ log_filename = test.log save_checkpoint_every_n_epoch = 20 save_checkpoint_every_n_batch = 1000 is_bi_graphemes = False -tensorboard_log_dir = tblog/libri_sample +mxboard_log_dir = mxlog/libri_sample # if random_seed is -1 then it gets random seed from timestamp mx_random_seed = 1234 random_seed = 1234 diff --git a/example/speech_recognition/singleton.py b/example/speech_recognition/singleton.py index 1d68edf..01717e4 100644 --- a/example/speech_recognition/singleton.py +++ b/example/speech_recognition/singleton.py @@ -19,9 +19,9 @@ from __future__ import print_function import logging as log class Singleton: - def __init__(self, decrated): - log.debug("Singleton Init %s" % decrated) - self._decorated = decrated + def __init__(self, decorated): + log.debug("Singleton Init %s" % decorated) + self._decorated = decorated def getInstance(self): try: @@ -30,25 +30,11 @@ class Singleton: self._instance = self._decorated() return self._instance - def __new__(class_, *args, **kwargs): + def __new__(cls, *args, **kwargs): print("__new__") - class_.instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs) - return class_.instances[class_] + cls._instance = super(Singleton, cls).__new__(cls, *args, **kwargs) + return cls._instance def __call__(self): raise TypeError("Singletons must be accessed through 'getInstance()'") - -class SingletonInstane: - __instance = None - - @classmethod - def __getInstance(cls): - return cls.__instance - - @classmethod - def instance(cls, *args, **kargs): - cls.__instance = cls(*args, **kargs) - cls.instance = cls.__getInstance - return cls.__instance - diff --git a/example/speech_recognition/stt_metric.py b/example/speech_recognition/stt_metric.py index fc1916b..ec74fc0 100644 --- a/example/speech_recognition/stt_metric.py +++ b/example/speech_recognition/stt_metric.py @@ -47,6 +47,7 @@ class STTMetric(mx.metric.EvalMetric): self.total_ctc_loss = 0. self.batch_loss = 0. self.is_logging = is_logging + def update(self, labels, preds): check_label_shapes(labels, preds) if self.is_logging: @@ -83,10 +84,15 @@ class STTMetric(mx.metric.EvalMetric): if self.is_logging: log.info("loss: %f " % loss) self.total_ctc_loss += self.batch_loss + def get_batch_loss(self): return self.batch_loss + def get_name_value(self): - total_cer = float(self.total_l_dist) / float(self.total_n_label) + try: + total_cer = float(self.total_l_dist) / float(self.total_n_label) + except ZeroDivisionError: + total_cer = float('inf') return total_cer, self.total_n_label, self.total_l_dist, self.total_ctc_loss @@ -244,4 +250,3 @@ def char_match_2way(label, pred): val = val1_max if val1_max > val2_max else val2_max val_matched = val1_max_matched if val1_max > val2_max else val2_max_matched return val, val_matched, n_whole_label - diff --git a/example/speech_recognition/train.py b/example/speech_recognition/train.py index 0d04e4e..b1ae50b 100644 --- a/example/speech_recognition/train.py +++ b/example/speech_recognition/train.py @@ -16,15 +16,14 @@ # under the License. import sys - +import json sys.path.insert(0, "../../python") import os.path +#mxboard setting +from mxboard import SummaryWriter import mxnet as mx from config_util import get_checkpoint_path, parse_contexts from stt_metric import STTMetric -#tensorboard setting -from tensorboard import SummaryWriter -import json from stt_bucketing_module import STTBucketingModule @@ -65,7 +64,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0): contexts = parse_contexts(args) num_gpu = len(contexts) eval_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, is_logging=enable_logging_validation_metric,is_epoch_end=True) - # tensorboard setting + # mxboard setting loss_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, is_logging=enable_logging_train_metric,is_epoch_end=False) optimizer = args.config.get('optimizer', 'optimizer') @@ -131,9 +130,9 @@ def do_training(args, module, data_train, data_val, begin_epoch=0): data_train.reset() data_train.is_first_epoch = True - #tensorboard setting - tblog_dir = args.config.get('common', 'tensorboard_log_dir') - summary_writer = SummaryWriter(tblog_dir) + #mxboard setting + mxlog_dir = args.config.get('common', 'mxboard_log_dir') + summary_writer = SummaryWriter(mxlog_dir) while True: @@ -144,7 +143,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0): for nbatch, data_batch in enumerate(data_train): module.forward_backward(data_batch) module.update() - # tensorboard setting + # mxboard setting if (nbatch + 1) % show_every == 0: module.update_metric(loss_metric, data_batch.label) #summary_writer.add_scalar('loss batch', loss_metric.get_batch_loss(), nbatch) @@ -160,7 +159,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0): module.forward(data_batch, is_train=True) module.update_metric(eval_metric, data_batch.label) - # tensorboard setting + # mxboard setting val_cer, val_n_label, val_l_dist, _ = eval_metric.get_name_value() log.info("Epoch[%d] val cer=%f (%d / %d)", n_epoch, val_cer, int(val_n_label - val_l_dist), val_n_label) curr_acc = val_cer @@ -170,7 +169,7 @@ def do_training(args, module, data_train, data_val, begin_epoch=0): data_train.reset() data_train.is_first_epoch = False - # tensorboard setting + # mxboard setting train_cer, train_n_label, train_l_dist, train_ctc_loss = loss_metric.get_name_value() summary_writer.add_scalar('loss epoch', train_ctc_loss, n_epoch) summary_writer.add_scalar('CER train', train_cer, n_epoch)