roywei commented on issue #16785: keras-mxnet training failed with FusedOP
URL: 
https://github.com/apache/incubator-mxnet/issues/16785#issuecomment-552768650
 
 
   This is still failing after 
https://github.com/apache/incubator-mxnet/pull/16781
   
   more stack trace:
   
   ```
   [07:21:45] 
/home/ubuntu/mxnet/src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running 
performance tests to find the best convolution algorithm, this can take a 
while... (set the environment variable MXNET_CUDN
   N_AUTOTUNE_DEFAULT to 0 to disable)
    984/1563 [=================>............] - ETA: 40s - loss: 1.6950 - acc: 
0.3762Traceback (most recent call last):
     File "cifar10_resnet.py", line 426, in <module>
       callbacks=callbacks)
     File "/usr/local/lib/python2.7/dist-packages/keras/legacy/interfaces.py", 
line 91, in wrapper
       return func(*args, **kwargs)
     File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", 
line 1433, in fit_generator
       initial_epoch=initial_epoch)
     File 
"/usr/local/lib/python2.7/dist-packages/keras/engine/training_generator.py", 
line 217, in fit_generator
       class_weight=class_weight)
     File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", 
line 1232, in train_on_batch
       outputs = self.train_function(ins)
     File 
"/usr/local/lib/python2.7/dist-packages/keras/backend/mxnet_backend.py", line 
5590, in train_function
       data, label, _, data_shapes, label_shapes = self._adjust_module(inputs, 
'train')
     File 
"/usr/local/lib/python2.7/dist-packages/keras/backend/mxnet_backend.py", line 
5534, in _adjust_module
       self._module._curr_module.reshape(data_shapes, label_shapes)
     File "/home/ubuntu/mxnet/python/mxnet/module/module.py", line 472, in 
reshape
       self._exec_group.reshape(self._data_shapes, self._label_shapes)
     File "/home/ubuntu/mxnet/python/mxnet/module/executor_group.py", line 397, 
in reshape
       self.bind_exec(data_shapes, label_shapes, reshape=True)
     File "/home/ubuntu/mxnet/python/mxnet/module/executor_group.py", line 373, 
in bind_exec
       allow_up_sizing=True, **dict(data_shapes_i + label_shapes_i))
     File "/home/ubuntu/mxnet/python/mxnet/executor.py", line 458, in reshape
       ctypes.byref(handle)))
     File "/home/ubuntu/mxnet/python/mxnet/base.py", line 255, in check_call
       raise MXNetError(py_str(_LIB.MXGetLastError()))
   mxnet.base.MXNetError: [07:22:37] ../src/nnvm/gradient.cc:213: Operator 
_FusedOp is non-differentiable because it didn't register FGradient attribute.
   Stack trace:
     [bt] (0) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x34)
 [0x7fe5ac33dc80]
     [bt] (1) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(+0x812cd86) 
[0x7fe5b1402d86]
     [bt] (2) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<nnvm::Graph
 (nnvm::Graph), nnvm::Graph (*)(nnvm::Graph)>::_M_invoke(std::_Any_data const&, 
nnvm::Graph&&)+0x76) [0x7fe5b$
   409119]
     [bt] (3) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(std::function<nnvm::Graph
 (nnvm::Graph)>::operator()(nnvm::Graph) const+0x60) [0x7fe5b1412934]
   [bt] (4) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(nnvm::ApplyPasses(nnvm::Graph,
 std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> >, std::allocator<std::
   __cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > 
> const&)+0x424) [0x7fe5b4cabea5]
     [bt] (5) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(nnvm::ApplyPass(nnvm::Graph,
 std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> 
> const&)+0xca) [0x7fe5b1047e78]
     [bt] (6) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(nnvm::pass::MXGradient(nnvm::Graph,
 std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> >, 
std::vector<nnvm::NodeEntry, std::allocator<
   nnvm::NodeEntry> >, std::vector<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> >, std::function<nnvm::NodeEntry 
(std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> >&&)>, 
std::function<int (nnvm::Nod
   e const&)>, std::function<nnvm::NodeEntry (nnvm::NodeEntry const&, 
nnvm::NodeEntry const&)>, std::vector<nnvm::Op const*, std::allocator<nnvm::Op 
const*> >, std::__cxx11::basic_string<char, std::char_traits<char
   >, std::allocator<char> >)+0x693) [0x7fe5b114905c]
     [bt] (7) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::exec::GraphExecutor::InitFullGraph(nnvm::Symbol,
 std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&)+0x6fd) [0x7fe5b
   1132135]
     [bt] (8) 
/home/ubuntu/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::exec::GraphExecutor::InitGraph(nnvm::Symbol,
 mxnet::Context const&, std::map<std::__cxx11::basic_string<char, 
std::char_traits<char>, std
   ::allocator<char> >, mxnet::Context, 
std::less<std::__cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> > >, 
std::allocator<std::pair<std::__cxx11::basic_string<char, 
std::char_traits<char>,
    std::allocator<char> > const, mxnet::Context> > > const&, 
std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, 
std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, 
std::vector<mx
   net::Context, std::allocator<mxnet::Context> > const&, 
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)+0xa5) 
[0x7fe5b1138403]
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to