[MXNet Forum] [Gluon] Difficulties with recurrent network

Flb via MXNet Forum Tue, 11 Aug 2020 09:13:27 -0700


Hi,
I'm new in the topic of neural network and even newer in recurrent network.
I tried tried to implement a very basic recurrent network, just to see if it 
can learn a noisy sinus.


Input data:
    A, T, epsilon, l =10, 10, 1, 3000
    data = A*np.sin(2*np.pi*np.arange(l)/T) + np.random.normal(0, epsilon, l)
    train_features = data[:2000]

Recurrent network model:

    class RnnTest(nn.HybridBlock):
        def __init__(self, size_hidden, **kwargs):
            super().__init__(**kwargs)
            with self.name_scope():
                self.size_hidden = size_hidden
                self.rnn = rnn.RNN(self.size_hidden)
                self.dense = nn.Dense(1)
        
        def hybrid_forward(self, F, x, state):
            out, state = self.rnn(x, state)
            return self.dense(out.reshape(-1, self.size_hidden)), state
        
        def begin_state(self, *args, **kwargs):
            return self.rnn.begin_state(*args, **kwargs)


I used the L2Loss from gluon for the training:

    size_hidden=16
    num_epochs, lr, clip, batch_size, num_steps = 500, 0.01, 10, 32, 10
    model = RnnTest(size_hidden)
    model.initialize(init.Normal(sigma=0.1), force_reinit=True)
    model.hybridize()
    adam_optimizer = mx.optimizer.Adam(clip_gradient=clip, learning_rate=lr, 
wd=0)
    trainer = gluon.Trainer(model.collect_params(), optimizer=adam_optimizer)

>From that, I iterate over the data (the complete code is just below).
But in the end, the loss oscillates around a constant value, corresponding to 
the mean of the data (or zero, it is the same number as I used sinusoidal data).
I spent a lot of time playing with the hyperparameters, the optimizer (I first 
used sgd).

Here the complete code (I used some convenient functions described in the book 
d2l.ai):

    import mxnet as mx
    from mxnet import gluon, init, np, npx, autograd
    npx.set_np()
    from mxnet.gluon import nn, rnn
    from d2l import mxnet as d2l
    import random

    A, T, epsilon, l =10, 10, 1, 3000
    data = A*np.sin(2*np.pi*np.arange(l)/T) + np.random.normal(0, epsilon, l)
    train_features = data[:2000]

    def create_random_iter(data, batch_size, num_steps):
        offset = random.randint(0, num_steps)
        data = data[offset:]
        num_examples = ((len(data) - 1) // num_steps)
        example_indices = list(range(0, num_examples * num_steps, num_steps))
        random.shuffle(example_indices)

        num_batches = num_examples // batch_size
        for i in range(0, batch_size * num_batches, batch_size):
            batch_indices = example_indices[i:(i+batch_size)]
            X = np.stack([data[j: j+num_steps] for j in batch_indices]).T
            Y = np.stack([data[j+1: j+1+num_steps] for j in batch_indices]).T
            yield X.reshape(X.shape[0], batch_size, 1), Y.reshape(Y.shape[0], 
batch_size, 1)

            
    class RnnTest(nn.HybridBlock):
        def __init__(self, size_hidden, **kwargs):
            super().__init__(**kwargs)
            with self.name_scope():
                self.size_hidden = size_hidden
                self.rnn = rnn.RNN(self.size_hidden)
                self.dense = nn.Dense(1)

        def hybrid_forward(self, F, x, state):
            out, state = self.rnn(x, state)
            return self.dense(out.reshape(-1, self.size_hidden)), state

        def begin_state(self, *args, **kwargs):
            return self.rnn.begin_state(*args, **kwargs)

        
    def train_epoch(model, train_features, loss, trainer, batch_size=8, 
num_steps=30, use_random_iter=True):
        train_iter = create_random_iter(data, batch_size=batch_size, 
num_steps=num_steps)
        metric = d2l.Accumulator(2)
        for X, Y in train_iter:
            state = model.begin_state(batch_size=batch_size)
            y = Y.T.reshape(-1, 1)
            with autograd.record():
                pred, state = model(X, state)
                l = loss(pred, y)
            l.backward()
            trainer.step(batch_size=batch_size)
            metric.add(float(l.mean()) * Y.shape[0], Y.shape[0])
        return metric[0]/metric[1]

    def train(model, train_features, loss, trainer, batch_size=4, num_steps=10):
        animator = d2l.Animator(xlabel='epoch', ylabel='loss', 
legend=['train'], xlim=[1, num_epochs])
        for i in range(num_epochs):
            err=train_epoch(model, train_features, loss, trainer, 
batch_size=batch_size, num_steps=num_steps)
            if i % 10 == 0:
                print(err)
                animator.add(i+1, [err])
        print('final loss:', err)

    size_hidden=16
    num_epochs, lr, clip, batch_size, num_steps = 100, 0.01, 10, 32, 10
    model = RnnTest(size_hidden)
    model.initialize(init.Normal(sigma=0.1), force_reinit=True)
    model.hybridize()
    adam_optimizer = mx.optimizer.Adam(clip_gradient=clip, learning_rate=lr, 
wd=0)
    trainer = gluon.Trainer(model.collect_params(), optimizer=adam_optimizer)
    loss = gluon.loss.L2Loss()


    train(model, train_features, loss, trainer, batch_size=batch_size, 
num_steps=num_steps)





---
[Visit 
Topic](https://discuss.mxnet.io/t/difficulties-with-recurrent-network/6483/1) 
or reply to this email to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.mxnet.io/email/unsubscribe/8683213c01413a0b13e4beac817872f32561b00ea8c21ab795a8a831e46f5aaa).

[MXNet Forum] [Gluon] Difficulties with recurrent network

Reply via email to