[ https://issues.apache.org/jira/browse/MXNET-867?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Lin Yuan updated MXNET-867: --------------------------- Description: Hi, I need to implement an encoder for a speech recognition model in MXNet that uses a 1D temporal max pooling layer with 'same' padding between successive Bidirectional LSTM layers (as below). Currently, there is no support for 1D max pooling with same padding in MXNet - https://mxnet.incubator.apache.org/api/python/symbol/symbol.html#mxnet.symb... . Could you please implement the required max pooling with 'same' padding support and advise on how to implement the following encoder model in MXNet? Thanks, Sundeep === # network target = "classes" EncKeyTotalDim = 1024 AttNumHeads = 1 EncKeyPerHeadDim = EncKeyTotalDim // AttNumHeads EncValueTotalDim = 2048 EncValuePerHeadDim = EncValueTotalDim // AttNumHeads LstmDim = EncValueTotalDim // 2 network = { "source": {"class": "eval", "eval": "tf.clip_by_value(source(0), -3.0, 3.0)"}, "lstm0_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": 1, "from": ["source"] }, "lstm0_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": -1, "from": ["source"] }, "lstm0_pool": {"class": "pool", "mode": "max", "padding": "same", "pool_size": (2,), "from": ["lstm0_fw", "lstm0_bw"], "trainable": False}, "lstm1_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": 1, "from": ["lstm0_pool"], "dropout": 0.3 }, "lstm1_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": -1, "from": ["lstm0_pool"], "dropout": 0.3 }, "lstm1_pool": {"class": "pool", "mode": "max", "padding": "same", "pool_size": (2,), "from": ["lstm1_fw", "lstm1_bw"], "trainable": False}, "lstm2_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": 1, "from": ["lstm1_pool"], "dropout": 0.3 }, "lstm2_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": -1, "from": ["lstm1_pool"], "dropout": 0.3 }, "lstm2_pool": {"class": "pool", "mode": "max", "padding": "same", "pool_size": (2,), "from": ["lstm2_fw", "lstm2_bw"], "trainable": False}, "lstm3_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": 1, "from": ["lstm2_pool"], "dropout": 0.3 }, "lstm3_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": -1, "from": ["lstm2_pool"], "dropout": 0.3 }, "lstm3_pool": {"class": "pool", "mode": "max", "padding": "same", "pool_size": (1,), "from": ["lstm3_fw", "lstm3_bw"], "trainable": False}, "lstm4_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": 1, "from": ["lstm3_pool"], "dropout": 0.3 }, "lstm4_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": -1, "from": ["lstm3_pool"], "dropout": 0.3 }, "lstm4_pool": {"class": "pool", "mode": "max", "padding": "same", "pool_size": (1,), "from": ["lstm4_fw", "lstm4_bw"], "trainable": False}, "lstm5_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": 1, "from": ["lstm4_pool"], "dropout": 0.3 }, "lstm5_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, "direction": -1, "from": ["lstm4_pool"], "dropout": 0.3 }, "encoder": {"class": "copy", "from": ["lstm5_fw", "lstm5_bw"]}, # dim: EncValueTotalDim === > Pooling1D with "same" padding > ----------------------------- > > Key: MXNET-867 > URL: https://issues.apache.org/jira/browse/MXNET-867 > Project: Apache MXNet > Issue Type: New Feature > Components: Apache MXNet Backend > Reporter: Lin Yuan > Assignee: Lin Yuan > Priority: Major > > Hi, > I need to implement an encoder for a speech recognition model in MXNet that > uses a 1D temporal max pooling layer with 'same' padding between successive > Bidirectional LSTM layers (as below). Currently, there is no support for 1D > max pooling with same padding in MXNet - > https://mxnet.incubator.apache.org/api/python/symbol/symbol.html#mxnet.symb... > . > Could you please implement the required max pooling with 'same' padding > support and advise on how to implement the following encoder model in MXNet? > Thanks, > Sundeep > === > # network > target = "classes" > EncKeyTotalDim = 1024 > AttNumHeads = 1 > EncKeyPerHeadDim = EncKeyTotalDim // AttNumHeads > EncValueTotalDim = 2048 > EncValuePerHeadDim = EncValueTotalDim // AttNumHeads > LstmDim = EncValueTotalDim // 2 > network = { > "source": {"class": "eval", "eval": "tf.clip_by_value(source(0), -3.0, > 3.0)"}, > "lstm0_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": 1, "from": ["source"] }, > "lstm0_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": -1, "from": ["source"] }, > "lstm0_pool": {"class": "pool", "mode": "max", "padding": "same", > "pool_size": (2,), "from": ["lstm0_fw", "lstm0_bw"], "trainable": False}, > "lstm1_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": 1, "from": ["lstm0_pool"], "dropout": 0.3 }, > "lstm1_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": -1, "from": ["lstm0_pool"], "dropout": 0.3 }, > "lstm1_pool": {"class": "pool", "mode": "max", "padding": "same", > "pool_size": (2,), "from": ["lstm1_fw", "lstm1_bw"], "trainable": False}, > "lstm2_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": 1, "from": ["lstm1_pool"], "dropout": 0.3 }, > "lstm2_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": -1, "from": ["lstm1_pool"], "dropout": 0.3 }, > "lstm2_pool": {"class": "pool", "mode": "max", "padding": "same", > "pool_size": (2,), "from": ["lstm2_fw", "lstm2_bw"], "trainable": False}, > "lstm3_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": 1, "from": ["lstm2_pool"], "dropout": 0.3 }, > "lstm3_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": -1, "from": ["lstm2_pool"], "dropout": 0.3 }, > "lstm3_pool": {"class": "pool", "mode": "max", "padding": "same", > "pool_size": (1,), "from": ["lstm3_fw", "lstm3_bw"], "trainable": False}, > "lstm4_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": 1, "from": ["lstm3_pool"], "dropout": 0.3 }, > "lstm4_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": -1, "from": ["lstm3_pool"], "dropout": 0.3 }, > "lstm4_pool": {"class": "pool", "mode": "max", "padding": "same", > "pool_size": (1,), "from": ["lstm4_fw", "lstm4_bw"], "trainable": False}, > "lstm5_fw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": 1, "from": ["lstm4_pool"], "dropout": 0.3 }, > "lstm5_bw" : { "class": "rec", "unit": "nativelstm2", "n_out" : LstmDim, > "direction": -1, "from": ["lstm4_pool"], "dropout": 0.3 }, > "encoder": {"class": "copy", "from": ["lstm5_fw", "lstm5_bw"]}, # dim: > EncValueTotalDim > === -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@mxnet.apache.org For additional commands, e-mail: issues-h...@mxnet.apache.org