szha edited a comment on issue #17038: use env var to control stack trace logging URL: https://github.com/apache/incubator-mxnet/pull/17038#issuecomment-564732017 @larroy 10 has been the existing limit. These changes were tested locally by triggering errors and comparing results. <details> <summary>example</summary> ```python In [1]: import mxnet as mx In [2]: mx.np.transpose(mx.np.ones((3,4)), (0, 0)) Out[2]: --------------------------------------------------------------------------- MXNetError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj) 700 type_pprinters=self.type_printers, 701 deferred_pprinters=self.deferred_printers) --> 702 printer.pretty(obj) 703 printer.flush() 704 return stream.getvalue() /usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj) 400 if cls is not object \ 401 and callable(cls.__dict__.get('__repr__')): --> 402 return _repr_pprint(obj, self, cycle) 403 404 return _default_pprint(obj, self, cycle) /usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle) 695 """A pprint that just redirects to the normal repr function.""" 696 # Find newlines and replace them with p.break_() --> 697 output = repr(obj) 698 for idx,output_line in enumerate(output.splitlines()): 699 if idx: ~/mxnet-distro/mxnet-build/python/mxnet/numpy/multiarray.py in __repr__(self) 914 [0.84426576 0.60276335 0.85794562]] @gpu(0) 915 """ --> 916 array_str = self.asnumpy().__repr__() 917 dtype = self.dtype 918 if 'dtype=' in array_str: ~/mxnet-distro/mxnet-build/python/mxnet/ndarray/ndarray.py in asnumpy(self) 2550 self.handle, 2551 data.ctypes.data_as(ctypes.c_void_p), -> 2552 ctypes.c_size_t(data.size))) 2553 return data 2554 ~/mxnet-distro/mxnet-build/python/mxnet/base.py in check_call(ret) 276 """ 277 if ret != 0: --> 278 raise MXNetError(py_str(_LIB.MXGetLastError())) 279 280 MXNetError: [13:01:41] /Users/zhasheng/mxnet-distro/mxnet-build/3rdparty/mshadow/mshadow/./tensor_cpu-inl.h:130: Check failed: _dst.shape_ == _src.shape_ ((3,3) vs. (3,4)) : Copy:shape mismatch:(3,3) vs (3,4) In [3]: os.environ['DMLC_LOG_STACK_TRACE_DEPTH'] = '10' --------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-3-0581fcdc8124> in <module> ----> 1 os.environ['DMLC_LOG_STACK_TRACE_DEPTH'] = '10' NameError: name 'os' is not defined In [4]: Do you really want to exit ([y]/n)? ^D [zhash...@a483e79ab3ab.ant][~/mxnet-distro/mxnet-build]% ipython Python 3.7.4 (default, Oct 12 2019, 19:06:48) Type 'copyright', 'credits' or 'license' for more information IPython 7.9.0 -- An enhanced Interactive Python. Type '?' for help. In [1]: import mxnet as mx In [2]: mx.np.transpose(mx.np.ones((3,4)), (0, 0)) Out[2]: --------------------------------------------------------------------------- MXNetError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj) 700 type_pprinters=self.type_printers, 701 deferred_pprinters=self.deferred_printers) --> 702 printer.pretty(obj) 703 printer.flush() 704 return stream.getvalue() /usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj) 400 if cls is not object \ 401 and callable(cls.__dict__.get('__repr__')): --> 402 return _repr_pprint(obj, self, cycle) 403 404 return _default_pprint(obj, self, cycle) /usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle) 695 """A pprint that just redirects to the normal repr function.""" 696 # Find newlines and replace them with p.break_() --> 697 output = repr(obj) 698 for idx,output_line in enumerate(output.splitlines()): 699 if idx: ~/mxnet-distro/mxnet-build/python/mxnet/numpy/multiarray.py in __repr__(self) 914 [0.84426576 0.60276335 0.85794562]] @gpu(0) 915 """ --> 916 array_str = self.asnumpy().__repr__() 917 dtype = self.dtype 918 if 'dtype=' in array_str: ~/mxnet-distro/mxnet-build/python/mxnet/ndarray/ndarray.py in asnumpy(self) 2550 self.handle, 2551 data.ctypes.data_as(ctypes.c_void_p), -> 2552 ctypes.c_size_t(data.size))) 2553 return data 2554 ~/mxnet-distro/mxnet-build/python/mxnet/base.py in check_call(ret) 276 """ 277 if ret != 0: --> 278 raise MXNetError(py_str(_LIB.MXGetLastError())) 279 280 MXNetError: [13:01:56] /Users/zhasheng/mxnet-distro/mxnet-build/3rdparty/mshadow/mshadow/./tensor_cpu-inl.h:130: Check failed: _dst.shape_ == _src.shape_ ((3,3) vs. (3,4)) : Copy:shape mismatch:(3,3) vs (3,4) In [3]: import os; os.environ['DMLC_LOG_STACK_TRACE_DEPTH'] = '10' In [4]: mx.np.transpose(mx.np.ones((3,4)), (0, 0)) Out[4]: --------------------------------------------------------------------------- MXNetError Traceback (most recent call last) /usr/local/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj) 700 type_pprinters=self.type_printers, 701 deferred_pprinters=self.deferred_printers) --> 702 printer.pretty(obj) 703 printer.flush() 704 return stream.getvalue() /usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj) 400 if cls is not object \ 401 and callable(cls.__dict__.get('__repr__')): --> 402 return _repr_pprint(obj, self, cycle) 403 404 return _default_pprint(obj, self, cycle) /usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle) 695 """A pprint that just redirects to the normal repr function.""" 696 # Find newlines and replace them with p.break_() --> 697 output = repr(obj) 698 for idx,output_line in enumerate(output.splitlines()): 699 if idx: ~/mxnet-distro/mxnet-build/python/mxnet/numpy/multiarray.py in __repr__(self) 914 [0.84426576 0.60276335 0.85794562]] @gpu(0) 915 """ --> 916 array_str = self.asnumpy().__repr__() 917 dtype = self.dtype 918 if 'dtype=' in array_str: ~/mxnet-distro/mxnet-build/python/mxnet/ndarray/ndarray.py in asnumpy(self) 2550 self.handle, 2551 data.ctypes.data_as(ctypes.c_void_p), -> 2552 ctypes.c_size_t(data.size))) 2553 return data 2554 ~/mxnet-distro/mxnet-build/python/mxnet/base.py in check_call(ret) 276 """ 277 if ret != 0: --> 278 raise MXNetError(py_str(_LIB.MXGetLastError())) 279 280 MXNetError: [13:02:07] /Users/zhasheng/mxnet-distro/mxnet-build/3rdparty/mshadow/mshadow/./tensor_cpu-inl.h:130: Check failed: _dst.shape_ == _src.shape_ ((3,3) vs. (3,4)) : Copy:shape mismatch:(3,3) vs (3,4) Stack trace: [bt] (0) 1 libmxnet.so 0x00000001170b407e dmlc::LogMessageFatal::~LogMessageFatal() + 110 [bt] (1) 2 libmxnet.so 0x0000000117309dca void mshadow::Copy<2, float>(mshadow::Tensor<mshadow::cpu, 2, float>, mshadow::Tensor<mshadow::cpu, 2, float> const&, mshadow::Stream<mshadow::cpu>*) + 474 [bt] (2) 3 libmxnet.so 0x0000000117af49f8 void mxnet::op::TransposeImpl<mshadow::cpu>(mxnet::RunContext, mxnet::TBlob const&, mxnet::TBlob const&, mxnet::TShape const&) + 5400 [bt] (3) 4 libmxnet.so 0x0000000117f6e49e void mxnet::op::NumpyTranspose<mshadow::cpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&, std::__1::vector<mxnet::OpReqType, std::__1::allocator<mxnet::OpReqType> > const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&) + 398 [bt] (4) 5 libmxnet.so 0x00000001192e0f62 mxnet::imperative::PushFCompute(std::__1::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&, std::__1::vector<mxnet::OpReqType, std::__1::allocator<mxnet::OpReqType> > const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::Resource, std::__1::allocator<mxnet::Resource> > const&, std::__1::vector<mxnet::NDArray*, std::__1::allocator<mxnet::NDArray*> > const&, std::__1::vector<mxnet::NDArray*, std::__1::allocator<mxnet::NDArray*> > const&, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > const&, std::__1::vector<mxnet::OpReqType, std::__1::allocator<mxnet::OpReqType> > const&)::'lambda'(mxnet::RunContext)::operator()(mxnet::RunContext) const + 706 [bt] (5) 6 libmxnet.so 0x00000001192e051d std::__1::__function::__func<mxnet::imperative::PushFCompute(std::__1::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&, std::__1::vector<mxnet::OpReqType, std::__1::allocator<mxnet::OpReqType> > const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::Resource, std::__1::allocator<mxnet::Resource> > const&, std::__1::vector<mxnet::NDArray*, std::__1::allocator<mxnet::NDArray*> > const&, std::__1::vector<mxnet::NDArray*, std::__1::allocator<mxnet::NDArray*> > const&, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > const&, std::__1::vector<mxnet::OpReqType, std::__1::allocator<mxnet::OpReqType> > const&)::'lambda'(mxnet::RunContext), std::__1::allocator<mxnet::imperative::PushFCompute(std::__1::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&, std::__1::vector<mxnet::OpReqType, std::__1::allocator<mxnet::OpReqType> > const&, std::__1::vector<mxnet::TBlob, std::__1::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::Resource, std::__1::allocator<mxnet::Resource> > const&, std::__1::vector<mxnet::NDArray*, std::__1::allocator<mxnet::NDArray*> > const&, std::__1::vector<mxnet::NDArray*, std::__1::allocator<mxnet::NDArray*> > const&, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > const&, std::__1::vector<mxnet::OpReqType, std::__1::allocator<mxnet::OpReqType> > const&)::'lambda'(mxnet::RunContext)>, void (mxnet::RunContext)>::operator()(mxnet::RunContext&&) + 29 [bt] (6) 7 libmxnet.so 0x0000000119235567 std::__1::__function::__func<mxnet::engine::ThreadedEngine::PushSync(std::__1::function<void (mxnet::RunContext)>, mxnet::Context, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, mxnet::FnProperty, int, char const*)::$_1, std::__1::allocator<mxnet::engine::ThreadedEngine::PushSync(std::__1::function<void (mxnet::RunContext)>, mxnet::Context, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, std::__1::vector<mxnet::engine::Var*, std::__1::allocator<mxnet::engine::Var*> > const&, mxnet::FnProperty, int, char const*)::$_1>, void (mxnet::RunContext, mxnet::engine::CallbackOnComplete)>::operator()(mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&) + 55 [bt] (7) 8 libmxnet.so 0x0000000119238dee mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*) + 702 [bt] (8) 9 libmxnet.so 0x000000011923ce65 mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::'lambda'()::operator()() const::'lambda'(std::__1::shared_ptr<dmlc::ManualEvent>)::operator()(std::__1::shared_ptr<dmlc::ManualEvent>) const + 181 [bt] (9) 10 libmxnet.so 0x000000011923cd09 std::__1::__function::__func<mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::'lambda'()::operator()() const::'lambda'(std::__1::shared_ptr<dmlc::ManualEvent>), std::__1::allocator<mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::'lambda'()::operator()() const::'lambda'(std::__1::shared_ptr<dmlc::ManualEvent>)>, void (std::__1::shared_ptr<dmlc::ManualEvent>)>::operator()(std::__1::shared_ptr<dmlc::ManualEvent>&&) + 41 ``` </details>
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services